{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Practical Exercises on Dimensionality Reduction\n",
    "\n",
    "### Exercise 1: PCA with Logistic Regression\n",
    "Compare classification performance with and without PCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load libraries\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "# Split data\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    digits.data, digits.target, test_size=0.2, random_state=2024\n",
    ")\n",
    "\n",
    "# Pipeline without PCA\n",
    "pipeline_no_pca = Pipeline([\n",
    "    ('scaler', StandardScaler()),\n",
    "    ('clf', LogisticRegression(max_iter=1000))\n",
    "])\n",
    "\n",
    "# Pipeline with PCA\n",
    "pipeline_with_pca = Pipeline([\n",
    "    ('scaler', StandardScaler()),\n",
    "    ('pca', PCA(n_components=0.95)),  # Keep 95% of variance\n",
    "    ('clf', LogisticRegression(max_iter=1000))\n",
    "])\n",
    "\n",
    "# Fit and evaluate both pipelines\n",
    "pipeline_no_pca.fit(X_train, y_train)\n",
    "pipeline_with_pca.fit(X_train, y_train)\n",
    "\n",
    "# Print results\n",
    "print(\"Accuracy without PCA:\", \n",
    "      accuracy_score(y_test, pipeline_no_pca.predict(X_test)))\n",
    "print(\"Accuracy with PCA:\", \n",
    "      accuracy_score(y_test, pipeline_with_pca.predict(X_test)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 2: t-SNE for Clustering Visualization\n",
    "Visualize how well t-SNE preserves cluster structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load libraries\n",
    "from sklearn.cluster import KMeans\n",
    "\n",
    "# Apply K-means clustering\n",
    "kmeans = KMeans(n_clusters=10, random_state=2024)\n",
    "cluster_labels = kmeans.fit_predict(digits.data)\n",
    "\n",
    "# Create side-by-side plots\n",
    "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))\n",
    "\n",
    "# Plot using true labels\n",
    "scatter1 = ax1.scatter(X_tsne[:, 0], X_tsne[:, 1], \n",
    "                      c=digits.target, cmap='viridis')\n",
    "ax1.set_title('t-SNE with True Labels')\n",
    "legend1 = ax1.legend(*scatter1.legend_elements(),\n",
    "                    title=\"Digit Classes\",\n",
    "                    loc=\"center left\",\n",
    "                    bbox_to_anchor=(1, 0.5))\n",
    "\n",
    "# Plot using cluster labels\n",
    "scatter2 = ax2.scatter(X_tsne[:, 0], X_tsne[:, 1], \n",
    "                      c=cluster_labels, cmap='viridis')\n",
    "ax2.set_title('t-SNE with K-means Clusters')\n",
    "legend2 = ax2.legend(*scatter2.legend_elements(),\n",
    "                    title=\"Clusters\",\n",
    "                    loc=\"center left\", \n",
    "                    bbox_to_anchor=(1, 0.5))\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
